Load data
We download the data from segerstolpe et al, hosted [here]
suppressPackageStartupMessages({
library(SingleCellExperiment)
library(stringr)
library(scRNAseq)
})
sce <- BaronPancreasData()
# We select the largest batch to avoid pre-processing issues
# sce <- sce[, sce$donor == "GSM2230759"]
filt <- rowSums(counts(sce) >= 2) >= 10
sce <- sce[filt, ]
sce
## class: SingleCellExperiment
## dim: 12336 8569
## metadata(0):
## assays(1): counts
## rownames(12336): A1CF A2M ... ZZZ3 pk
## rowData names(0):
## colnames(8569): human1_lib1.final_cell_0001 human1_lib1.final_cell_0002
## ... human4_lib3.final_cell_0700 human4_lib3.final_cell_0701
## colData names(2): donor label
## reducedDimNames(0):
## altExpNames(0):
rm(filt)
Pre-processing
The dataset can be normalized using SCVI
suppressPackageStartupMessages({
library(Seurat)
})
pre_process_time <- system.time({
se <- CreateSeuratObject(counts = counts(sce),
min.cells = 0,
min.features = 0,
project = "de")
se <- AddMetaData(se, as.data.frame(colData(sce)))
se <- NormalizeData(se, verbose = FALSE)
se <- FindVariableFeatures(se, selection.method = 'vst', nfeatures = 4000,
verbose = FALSE)
se <- se[VariableFeatures(se), ]
se <- ScaleData(object = se, vars.to.regress = c("nCount_RNA", "donor"))
sce <- as.SingleCellExperiment(se)
})
suppressPackageStartupMessages({
library(reticulate)
})
use_python("/usr/local/bin/python3")
scvi <- import("scvi")
anndata <- import("anndata")
np <- import("numpy")
sc <- import("scanpy")
scvi_time <- system.time({
adata <- anndata$AnnData(X = as.sparse(t(counts(sce))),
obs = data.frame(cells = colnames(sce),
batch = sce$donor))
scvi$data$setup_anndata(adata, batch_key = "batch")
model <- scvi$model$SCVI(adata)
model$train(n_epochs = as.integer(60), n_epochs_kl_warmup = as.integer(30))
})
We can visualize the data using the labels from the original publication
library(scater)
reducedDim(sce, "scvi") <- model$get_latent_representation()
denoised <- t(model$get_normalized_expression(adata, library_size = 10e4))
dimnames(denoised) <- dimnames(counts(sce))
assay(sce, "denoised") <- log1p(denoised)
sce <- runTSNE(sce, dimred = "scvi")
plotTSNE(sce, colour_by = "label")

plotTSNE(sce, colour_by = "donor")

Running Dune
library(Dune)
df <- colData(sce)[, c("SC3", "seurat", "seurat_scvi")] %>%
as.matrix()
dune_time <- system.time(
merger <- Dune(clusMat = df, BPPARAM = BPPARAM, metric = "NMI")
)
plotPrePost(merger)

NMItrend(merger)

plotNMIs(merger$initialMat)

plotNMIs(merger$currentMat)

Picking the final clustering result to use
Visual pick
sce$seurat_Final <- as.factor(merger$currentMat$seurat)
plotTSNE(sce, colour_by = "seurat_Final")

sce$seurat_scvi_Final <- as.factor(merger$currentMat$seurat_scvi)
plotTSNE(sce, colour_by = "seurat_scvi_Final")

sce$SC3_Final <- as.factor(merger$currentMat$SC3)
plotTSNE(sce, colour_by = "SC3_Final")

Picking based on sihouette
library(cluster)
dist_mat <- dist(as.matrix(reducedDim(sce, "scvi")))
sils <- lapply(merger$currentMat %>% as.data.frame, function(label){
silhouette(label, dist = dist_mat)[,3] %>% mean()
}) %>% unlist()
sils
## SC3 seurat seurat_scvi
## -0.03325991 0.18725471 0.31371226
Runtimes
times <- c(pre_process_time[1],
scvi_time[1],
sc3_time[1],
seurat_time[1],
seurat_scvi_time[1],
dune_time[1])
names(times) <- c("Pre-Process",
"SCVI",
"Sc3",
"Seurat",
"Seurat after SCVI",
"Dune")
df <- data.frame(times = times,
Name = factor(names(times), levels = names(times)))
ggplot(df, aes(x = Name, y = times, fill = Name)) +
geom_col() +
theme_classic() +
labs(x = "Step", y = "Time (second)") +
scale_color_brewer(palette = "Dark2") +
scale_y_log10()

LS0tCmF1dGhvcjogIkhlY3RvciBSb3V4IGRlIELDqXppZXV4IgpkYXRlOiAnYHIgZm9ybWF0KFN5cy50aW1lKCksICIlZCAlQiAsICVZIilgJwp0aXRsZTogJ0R1bmUgd29ya2Zsb3cgb24gdGhlIGJhcm9uIGRhdGFzZXQnCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIHRvY19kZXB0aDogMgogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFCi0tLQoKYGBge3IgbG9hZCBwYWNrYWdlcywgaW5jbHVkZT1GfQpsaWJyYXJ5KGtuaXRyKQpvcHRzX2NodW5rJHNldCgKICBmaWcucG9zID0gIiFoIiwgb3V0LmV4dHJhID0gIiIsIHdhcm5pbmcgPSBGLCBtZXNzYWdlID0gRiwKICBmaWcuYWxpZ24gPSAiY2VudGVyIgopCk5DT1JFUyA8LSAyCmBgYAoKIyBMb2FkIGRhdGEKCldlIGRvd25sb2FkIHRoZSBkYXRhIGZyb20gc2VnZXJzdG9scGUgZXQgYWwsIGhvc3RlZCBbaGVyZV0KCmBgYHtyfQpzdXBwcmVzc1BhY2thZ2VTdGFydHVwTWVzc2FnZXMoewogIGxpYnJhcnkoU2luZ2xlQ2VsbEV4cGVyaW1lbnQpCiAgbGlicmFyeShzdHJpbmdyKQogIGxpYnJhcnkoc2NSTkFzZXEpCn0pCnNjZSA8LSBCYXJvblBhbmNyZWFzRGF0YSgpCiMgV2Ugc2VsZWN0IHRoZSBsYXJnZXN0IGJhdGNoIHRvIGF2b2lkIHByZS1wcm9jZXNzaW5nIGlzc3VlcwojIHNjZSA8LSBzY2VbLCBzY2UkZG9ub3IgPT0gIkdTTTIyMzA3NTkiXQpmaWx0IDwtIHJvd1N1bXMoY291bnRzKHNjZSkgPj0gMikgPj0gMTAKc2NlIDwtIHNjZVtmaWx0LCBdCnNjZQpybShmaWx0KQpgYGAKCiMgUHJlLXByb2Nlc3NpbmcKClRoZSBkYXRhc2V0IGNhbiBiZSBub3JtYWxpemVkIHVzaW5nIFNDVkkgCgpgYGB7cn0Kc3VwcHJlc3NQYWNrYWdlU3RhcnR1cE1lc3NhZ2VzKHsKICBsaWJyYXJ5KFNldXJhdCkKfSkKcHJlX3Byb2Nlc3NfdGltZSA8LSBzeXN0ZW0udGltZSh7CiAgc2UgPC0gQ3JlYXRlU2V1cmF0T2JqZWN0KGNvdW50cyA9IGNvdW50cyhzY2UpLAogICAgICAgICAgICAgICAgICAgICAgICAgICBtaW4uY2VsbHMgPSAwLAogICAgICAgICAgICAgICAgICAgICAgICAgICBtaW4uZmVhdHVyZXMgPSAwLAogICAgICAgICAgICAgICAgICAgICAgICAgICBwcm9qZWN0ID0gImRlIikKICBzZSA8LSBBZGRNZXRhRGF0YShzZSwgYXMuZGF0YS5mcmFtZShjb2xEYXRhKHNjZSkpKQogIHNlIDwtIE5vcm1hbGl6ZURhdGEoc2UsIHZlcmJvc2UgPSBGQUxTRSkKICBzZSA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhzZSwgc2VsZWN0aW9uLm1ldGhvZCA9ICd2c3QnLCBuZmVhdHVyZXMgPSA0MDAwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2UgPSBGQUxTRSkKICBzZSA8LSBzZVtWYXJpYWJsZUZlYXR1cmVzKHNlKSwgXQogIHNlIDwtIFNjYWxlRGF0YShvYmplY3QgPSBzZSwgdmFycy50by5yZWdyZXNzID0gYygibkNvdW50X1JOQSIsICJkb25vciIpKQogIHNjZSA8LSBhcy5TaW5nbGVDZWxsRXhwZXJpbWVudChzZSkKfSkKYGBgCgpgYGB7ciwgd2FybmluZz1GQUxTRX0Kc3VwcHJlc3NQYWNrYWdlU3RhcnR1cE1lc3NhZ2VzKHsKICBsaWJyYXJ5KHJldGljdWxhdGUpCn0pCnVzZV9weXRob24oIi91c3IvbG9jYWwvYmluL3B5dGhvbjMiKQpzY3ZpIDwtIGltcG9ydCgic2N2aSIpCmFubmRhdGEgPC0gaW1wb3J0KCJhbm5kYXRhIikKbnAgPC0gaW1wb3J0KCJudW1weSIpCnNjIDwtIGltcG9ydCgic2NhbnB5IikKc2N2aV90aW1lIDwtIHN5c3RlbS50aW1lKHsKICBhZGF0YSA8LSBhbm5kYXRhJEFubkRhdGEoWCA9IGFzLnNwYXJzZSh0KGNvdW50cyhzY2UpKSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgIG9icyA9IGRhdGEuZnJhbWUoY2VsbHMgPSBjb2xuYW1lcyhzY2UpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJhdGNoID0gc2NlJGRvbm9yKSkKICBzY3ZpJGRhdGEkc2V0dXBfYW5uZGF0YShhZGF0YSwgYmF0Y2hfa2V5ID0gImJhdGNoIikKICBtb2RlbCA8LSBzY3ZpJG1vZGVsJFNDVkkoYWRhdGEpCiAgbW9kZWwkdHJhaW4obl9lcG9jaHMgPSBhcy5pbnRlZ2VyKDYwKSwgbl9lcG9jaHNfa2xfd2FybXVwID0gYXMuaW50ZWdlcigzMCkpCn0pCmBgYAoKV2UgY2FuIHZpc3VhbGl6ZSB0aGUgZGF0YSB1c2luZyB0aGUgbGFiZWxzIGZyb20gdGhlIG9yaWdpbmFsIHB1YmxpY2F0aW9uCgpgYGB7cn0KbGlicmFyeShzY2F0ZXIpCnJlZHVjZWREaW0oc2NlLCAic2N2aSIpIDwtIG1vZGVsJGdldF9sYXRlbnRfcmVwcmVzZW50YXRpb24oKQpkZW5vaXNlZCA8LSB0KG1vZGVsJGdldF9ub3JtYWxpemVkX2V4cHJlc3Npb24oYWRhdGEsIGxpYnJhcnlfc2l6ZSA9IDEwZTQpKQpkaW1uYW1lcyhkZW5vaXNlZCkgPC0gZGltbmFtZXMoY291bnRzKHNjZSkpCmFzc2F5KHNjZSwgImRlbm9pc2VkIikgPC0gbG9nMXAoZGVub2lzZWQpCnNjZSA8LSBydW5UU05FKHNjZSwgZGltcmVkID0gInNjdmkiKQpwbG90VFNORShzY2UsIGNvbG91cl9ieSA9ICJsYWJlbCIpCnBsb3RUU05FKHNjZSwgY29sb3VyX2J5ID0gImRvbm9yIikKYGBgCgojIENyZWF0aW5nIGlucHV0cyB0byBEdW5lCiMjIHNjMwoKYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyhsaWJyYXJ5KFNDMykpCnNjM190aW1lIDwtIHN5c3RlbS50aW1lKHsKICBzY2Vfc2MzIDwtIHNjZQogIGxvZ2NvdW50cyhzY2Vfc2MzKSA8LSBhc3NheShzY2UsICJkZW5vaXNlZCIpCiAgcm93RGF0YShzY2Vfc2MzKSRmZWF0dXJlX3N5bWJvbCA8LSByb3duYW1lcyhzY2Vfc2MzKQogIGNvdW50cyhzY2Vfc2MzKSA8LSBhcy5tYXRyaXgoY291bnRzKHNjZV9zYzMpKQogIGxvZ2NvdW50cyhzY2Vfc2MzKSA8LSBhcy5tYXRyaXgobG9nY291bnRzKHNjZV9zYzMpKQogIHNjZV9zYzMgPC0gc2MzX2VzdGltYXRlX2soc2NlX3NjMykKICBLIDwtIG1ldGFkYXRhKHNjZV9zYzMpJHNjMyRrX2VzdGltYXRpb24KICAjIE5vdGU6IHdpdGggUiA+PSA0LjAsIFJTdHVkaW8gYW5kIE1hYyBPUywgdGhpcyBjYW4gZmFpbHMuIAogICMgQSB3b3JrYXJvdW5kIGlzIHJ1bm5pbmcgCiAgIyBwYXJhbGxlbDo6OnNldERlZmF1bHRDbHVzdGVyT3B0aW9ucyhzZXR1cF9zdHJhdGVneSA9ICJzZXF1ZW50aWFsIikKICBzY2Vfc2MzIDwtIHNjMyhzY2Vfc2MzLCBrcyA9IEssIG5fY29yZXMgPSBOQ09SRVMsIHJhbmRfc2VlZCA9IDc4NjkwNywKICAgICAgICAgICAgICAgICBzdm1fbnVtX2NlbGxzID0gcm91bmQoLjUgKiBuY29sKHNjZSkpKQogIHNjZV9zYzMgPC0gc2MzX3J1bl9zdm0oc2NlX3NjMywga3MgPSBLKSAgICAKICBzY2UkU0MzIDwtIGNvbERhdGEoc2NlX3NjMylbLCBwYXN0ZTAoInNjM18iLCBLLCAiX2NsdXN0ZXJzIildICU+JSBhcy5mYWN0b3IoKQp9KQpwbG90VFNORShzY2UsIGNvbG91cl9ieSA9ICJTQzMiKQpgYGAKCiMjIFNldXJhdCAKCmBgYHtyfQpzZXVyYXRfdGltZSA8LSBzeXN0ZW0udGltZSh7CiAgc2UgPC0gUnVuUENBKHNlLCB2ZXJib3NlID0gRkFMU0UpCiAgc2UgPC0gRmluZE5laWdoYm9ycyhzZSwgdmVyYm9zZSA9IEZBTFNFKQogIHNlIDwtIEZpbmRDbHVzdGVycyhvYmplY3QgPSBzZSwgdmVyYm9zZSA9IEZBTFNFKQogIHNjZSRzZXVyYXQgPC0gSWRlbnRzKHNlKQp9KQpwbG90VFNORShzY2UsIGNvbG91cl9ieSA9ICJzZXVyYXQiKQpgYGAKCiMjIFNldXJhdCB3aXRoIFNDVkkKCmBgYHtyfQpzZXVyYXRfc2N2aV90aW1lIDwtIHN5c3RlbS50aW1lKHsKICBzZXUgPC0gYXMuU2V1cmF0KHggPSBzY2UsIGNvdW50cyA9ICJjb3VudHMiLCBkYXRhID0gImNvdW50cyIpCiAgc2V1IDwtIEZpbmROZWlnaGJvcnMoc2V1LCByZWR1Y3Rpb24gPSAic2N2aSIsIHZlcmJvc2UgPSBGQUxTRSkKICBzZXUgPC0gRmluZENsdXN0ZXJzKG9iamVjdCA9IHNldSwgdmVyYm9zZSA9IEZBTFNFKQogIHNjZSRzZXVyYXRfc2N2aSA8LSBJZGVudHMoc2V1KQp9KQpwbG90VFNORShzY2UsIGNvbG91cl9ieSA9ICJzZXVyYXRfc2N2aSIpCmBgYAoKIyBSdW5uaW5nIER1bmUKCmBgYHtyfQpsaWJyYXJ5KER1bmUpCmRmIDwtIGNvbERhdGEoc2NlKVssIGMoIlNDMyIsICJzZXVyYXQiLCAic2V1cmF0X3NjdmkiKV0gJT4lCiAgYXMubWF0cml4KCkKZHVuZV90aW1lIDwtIHN5c3RlbS50aW1lKAogIG1lcmdlciA8LSBEdW5lKGNsdXNNYXQgPSBkZiwgQlBQQVJBTSA9IEJQUEFSQU0sIG1ldHJpYyA9ICJOTUkiKQopCmBgYAoKYGBge3J9CnBsb3RQcmVQb3N0KG1lcmdlcikKTk1JdHJlbmQobWVyZ2VyKQpgYGAKCmBgYHtyfQpwbG90Tk1JcyhtZXJnZXIkaW5pdGlhbE1hdCkKcGxvdE5NSXMobWVyZ2VyJGN1cnJlbnRNYXQpCmBgYAoKIyBQaWNraW5nIHRoZSBmaW5hbCBjbHVzdGVyaW5nIHJlc3VsdCB0byB1c2UKIyMgVmlzdWFsIHBpY2sKCmBgYHtyfQpzY2Ukc2V1cmF0X0ZpbmFsIDwtIGFzLmZhY3RvcihtZXJnZXIkY3VycmVudE1hdCRzZXVyYXQpCnBsb3RUU05FKHNjZSwgY29sb3VyX2J5ID0gInNldXJhdF9GaW5hbCIpCmBgYAoKYGBge3J9CnNjZSRzZXVyYXRfc2N2aV9GaW5hbCA8LSBhcy5mYWN0b3IobWVyZ2VyJGN1cnJlbnRNYXQkc2V1cmF0X3NjdmkpCnBsb3RUU05FKHNjZSwgY29sb3VyX2J5ID0gInNldXJhdF9zY3ZpX0ZpbmFsIikKYGBgCgpgYGB7cn0Kc2NlJFNDM19GaW5hbCA8LSBhcy5mYWN0b3IobWVyZ2VyJGN1cnJlbnRNYXQkU0MzKQpwbG90VFNORShzY2UsIGNvbG91cl9ieSA9ICJTQzNfRmluYWwiKQpgYGAKCiMjIFBpY2tpbmcgYmFzZWQgb24gc2lob3VldHRlCgpgYGB7cn0KbGlicmFyeShjbHVzdGVyKQpkaXN0X21hdCA8LSBkaXN0KGFzLm1hdHJpeChyZWR1Y2VkRGltKHNjZSwgInNjdmkiKSkpCnNpbHMgPC0gbGFwcGx5KG1lcmdlciRjdXJyZW50TWF0ICU+JSBhcy5kYXRhLmZyYW1lLCBmdW5jdGlvbihsYWJlbCl7CiAgc2lsaG91ZXR0ZShsYWJlbCwgZGlzdCA9IGRpc3RfbWF0KVssM10gJT4lICBtZWFuKCkKfSkgJT4lIHVubGlzdCgpCnNpbHMKYGBgCgojIFJ1bnRpbWVzCgpgYGB7cn0KdGltZXMgPC0gYyhwcmVfcHJvY2Vzc190aW1lWzFdLAogICAgICAgICAgIHNjdmlfdGltZVsxXSwKICAgICAgICAgICBzYzNfdGltZVsxXSwKICAgICAgICAgICBzZXVyYXRfdGltZVsxXSwKICAgICAgICAgICBzZXVyYXRfc2N2aV90aW1lWzFdLAogICAgICAgICAgIGR1bmVfdGltZVsxXSkKbmFtZXModGltZXMpIDwtIGMoIlByZS1Qcm9jZXNzIiwKICAgICAgICAgICAgICAgICAgIlNDVkkiLAogICAgICAgICAgICAgICAgICAiU2MzIiwKICAgICAgICAgICAgICAgICAgIlNldXJhdCIsCiAgICAgICAgICAgICAgICAgICJTZXVyYXQgYWZ0ZXIgU0NWSSIsCiAgICAgICAgICAgICAgICAgICJEdW5lIikKZGYgPC0gZGF0YS5mcmFtZSh0aW1lcyA9IHRpbWVzLAogICAgICAgICAgICAgICAgIE5hbWUgPSBmYWN0b3IobmFtZXModGltZXMpLCBsZXZlbHMgPSBuYW1lcyh0aW1lcykpKQpnZ3Bsb3QoZGYsIGFlcyh4ID0gTmFtZSwgeSA9IHRpbWVzLCBmaWxsID0gTmFtZSkpICsKICBnZW9tX2NvbCgpICsKICB0aGVtZV9jbGFzc2ljKCkgKwogIGxhYnMoeCA9ICJTdGVwIiwgeSA9ICJUaW1lIChzZWNvbmQpIikgKwogIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikgKwogIHNjYWxlX3lfbG9nMTAoKQpgYGA=